********************************************************************
** To construct dataset of US state level value added by industry **
********************************************************************


import delimited gsp_sic_all.csv , varnames(1) clear

drop in 4057/37536   // limit to GDP: other rows contain GDP elements
drop region componentid componentname  
compress

forvalues i = 9/43 {
   local x = `i' + 1954 
   rename v`i' yr`x'
   }

rename geofips state
replace state = substr(state , 1, 2)
destring(state) , replace

rename industryid indcensus
destring(indcensus) , replace

rename industryclassification sic
replace sic = "." if real(sic) == .
destring(sic) , replace

replace sic = 1 if indcensus == 4 
replace sic = 7 if indcensus == 5 
replace sic = 372 if indcensus == 22 
drop if mi(sic)

rename geoname state_name

** reshape as state-year panel **
reshape long yr , i(state state_name indcensus description) j(year) 
destring(year) , replace
rename yr va

drop indcensus description
reshape wide va , i(state state_name year) j(sic) 

rename va1 va1_2
rename va7 va7_9

foreach i in 11 15 16 17 18 19 37 43 66 68 69 71 74 77 84 85 87 {
   gen int va`i' = .
   }   // creating empty sic to ease following loop

forvalues i = 10/49 {
   destring(va`i') , force replace  // force: convert non-numeric to missing
   label variable va`i' "Value added SIC `i'"
   }

rename va372 va372_379   
   
forvalues i = 60/88 {
   destring(va`i') , force replace  // force: convert non-numeric to missing
   label variable va`i' "Value added SIC `i'"
   }
   
foreach i in 1_2 7_9 371 372_379 {
   destring(va`i') , force replace  
   label variable va`i' "Value added SIC `i'"
   }
   
foreach i in 11 15 16 17 18 19 37 43 66 68 69 71 74 77 84 85 87 {
   drop va`i'
   }     
   
compress
sort state year
save indy.dta , replace


*** check data ***
use indy.dta , clear

gen vasum1 = va2 + va72   
gen error1 = va1 - vasum1
summ error1   // all sectors

egen vasum2 = rowtotal(va3 va6 va11 va12 va36 va47 va48 va49 va57)
gen error2 = va2 - vasum2
summ error2   // private sector

egen vasum12 = rowtotal(va14-va24 va26-va35)
gen error12 = va12 - vasum12
summ error12   // manufacturing

